import pandas as pd
import numpy as np
import seaborn as sb
import altair as alt
from vega_datasets import data
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
d = sb.load_dataset('diamonds')
d.rename(columns = {'x':'length', 'y':'width', 'z':'depth_in_mm'}, inplace = True)
df = d.sample(n = 4000, replace = False, ignore_index = True)
df.head()
| carat | cut | color | clarity | depth | table | price | length | width | depth_in_mm | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.24 | Ideal | G | VS2 | 62.1 | 56.0 | 8504 | 6.87 | 6.91 | 4.28 |
| 1 | 0.42 | Ideal | F | SI1 | 60.9 | 55.0 | 992 | 4.88 | 4.84 | 2.96 |
| 2 | 1.26 | Premium | G | SI2 | 59.1 | 59.0 | 5899 | 7.09 | 7.05 | 4.18 |
| 3 | 1.70 | Ideal | F | VS2 | 62.4 | 57.0 | 17360 | 7.65 | 7.57 | 4.75 |
| 4 | 0.70 | Very Good | E | VS1 | 62.0 | 60.0 | 3109 | 5.61 | 5.64 | 3.49 |
brush = alt.selection_interval()
click = alt.selection_point(fields = ['cut'])
# Creating legend
legend = alt.Chart(df).mark_square(size = 85).encode(
y = alt.Y('cut', title = None, axis = alt.Axis(orient = 'right', labelFontWeight = 600)),
color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('lightgrey'))
).properties(title = 'Cut').add_params(click)
# Scatter Plot
scatter = alt.Chart(df, width = 350, height = 360).mark_circle(size = 35).encode(
x = alt.X('carat:Q', title = 'Carat', axis = alt.Axis(grid = False)),
y = alt.Y('price:Q', title = 'Price', axis = alt.Axis(format = '$s', grid = False)),
color = alt.condition(brush, alt.Color('cut:N', legend = None), alt.value('lightgrey')),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Carat vs Price').add_params(brush, click).transform_filter(click)
# Clarity Bar Chart
bar1 = alt.Chart(df, width = 350).mark_bar().encode(
x = alt.X('count()', stack = None, title = None, axis = alt.Axis(grid = False, format = 's')),
y = alt.Y('clarity:N', title = None, scale = alt.Scale(domain = df['clarity'].unique().tolist())),
color = alt.Color('cut', legend = None),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Distribution of Clarity').transform_filter(brush).add_params(click)
# Color Bar Chart
bar2 = alt.Chart(df, width = 350).mark_bar().encode(
x = alt.X('count()', stack = None, title = None, axis = alt.Axis(grid = False, format = 's')),
y = alt.Y('color:N', title = None, scale = alt.Scale(domain = df['color'].unique().tolist())),
color = alt.Color('cut', legend = None),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Distribution of Color').transform_filter(brush).add_params(click)
(scatter | (bar1 & bar2) | legend).configure_view(strokeWidth = 0).configure_title(fontSize = 12)
# Format selections
color = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Color: '))
size = alt.param(value = 35, bind = alt.binding(input = 'range', min = 10, max = 80, step = 5, name = 'Size: '))
titlesize = alt.param(value = 12, bind = alt.binding(input = 'range', min = 5, max = 18, step = 1, name = 'Title Size: '))
# Selections
click = alt.selection_point(fields = ['cut', 'clarity'], on = 'pointerover')
brush = alt.selection_interval(encodings = ['x'])
# Creating legened
legend = alt.Chart(df).mark_rect().encode(
x = alt.X('cut', title = None, axis = alt.Axis(labelFontWeight = 600)),
y = alt.Y('clarity', title = None, axis = alt.Axis(orient = 'right', labelFontWeight = 600)),
color = alt.condition(click, alt.Color('count()', legend = None), alt.value('lightgrey'))
).properties(title = alt.TitleParams('Cut and Clarity', fontSize = 11)).add_params(click)
scatter1 = alt.Chart(df).mark_point(color = color, size = size).encode(
x = alt.X('carat', title = 'Carat'),
y = alt.Y('price', title = 'Price', axis = alt.Axis(format = '$s')),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = alt.TitleParams('Price vs Carat', fontSize = titlesize)).add_params(click, color, size, titlesize)
scatter2 = alt.Chart(df).mark_point(color = color, size = size).encode(
x = alt.X('carat', title = 'Carat'),
y = alt.Y('length', title = 'Length'),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = alt.TitleParams('Length vs Carat', fontSize = titlesize)).add_params(click, color, size, titlesize)
(scatter1 | scatter2 | legend).configure_axis(grid = False).configure_view(strokeWidth = 0)
df = d.sample(n = 20000, replace = False, ignore_index = True)
# Binding cut and clarity
cut = df['cut'].unique().tolist()
button1 = alt.binding_radio(name = 'Cut type: ', options = [None] + cut, labels = ['All'] + cut )
clarity = df['clarity'].unique().tolist()
button2 = alt.binding_radio(name = 'clarity type: ', options = [None] + clarity, labels = ['All'] + clarity )
# Selection point with both radio buttons
click = alt.selection_point(fields = ['cut', 'clarity'], bind = {'cut':button1, 'clarity':button2})
hist1 = alt.Chart(df).mark_bar().encode(
x = alt.X('table:Q', title = 'Table', bin = alt.Bin(maxbins = 40)),
y = alt.Y('count()', title = 'Frequency', stack = None),
color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('white')),
stroke = alt.condition(click, alt.Color('clarity:N', legend = None), alt.value('white')),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Diamond Table Distribution').add_params(click).transform_filter(click)
hist2 = alt.Chart(df).mark_bar().encode(
x = alt.X('length:Q', title = 'Length', bin = alt.Bin(maxbins = 40)),
y = alt.Y('count()', title = 'Frequency', stack = None),
color = alt.condition(click, alt.Color('cut:N', legend = None), alt.value('white')),
stroke = alt.condition(click, alt.Color('clarity:N', legend = None), alt.value('white')),
opacity = alt.condition(click, alt.value(1), alt.value(0))
).properties(title = 'Diamond Length Distribution').add_params(click).transform_filter(click)
scatter = alt.Chart(df, title = 'Diamond Length vs Table').mark_circle(size = 40).encode(
x = alt.X('table', title = 'Table', scale = alt.Scale(zero = False)),
y = alt.Y('length', title = 'Length', scale = alt.Scale(zero = False))
).transform_filter(click).add_params(click)
(scatter | hist1 | hist2).configure_axis(grid = False).configure_view(strokeWidth = 0)
df = d.sample(n = 4000, replace = False, ignore_index = True)
titlefont = alt.param(value = 'serif', bind = alt.binding(input = 'radio',
options = ['Georgia', 'Arial', 'Verdana', 'Times New Roman'],
name = 'Title font '))
titlesize = alt.param(value = 14, bind = alt.binding(input = 'range', min = 5, max = 20, step = 1, name = 'Title size '))
titleweight = alt.param(value = 700, bind = alt.binding(input = 'range', min = 500, max = 900, step = 200, name = 'Title weight '))
titledx = alt.param(value = 0, bind = alt.binding(input = 'range', min = -80, max = 80, step = 1, name = 'Title dx '))
titlecolor = alt.param(value = 'black', bind = alt.binding(input = 'color', name = 'Title color '))
size = alt.param(value = 30, bind = alt.binding(input = 'range', min = 5, max = 30, step = 1, name = 'Bar size '))
color = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Bar Color '))
strokecolor = alt.param(value = 'steelblue', bind = alt.binding(input = 'color', name = 'Stroke color '))
opacity = alt.param(value = 1, bind = alt.binding(input = 'range', min = 0, max = 1, step = 0.05, name = 'Opacity '))
xsize = alt.param(value = 10, bind = alt.binding(input = 'range', min = 5, max = 20, step = 1, name = 'Axis size '))
xcolor = alt.param(value = 'black', bind = alt.binding(input = 'color', name = 'Axis color '))
textalign = alt.param(value = 'left', bind = alt.binding(input = 'select', options = ['left', 'middle', 'right'], name = 'Text align '))
textdx = alt.param(value = 3, bind = alt.binding(input = 'range', min = -20, max = 20, step = 1, name = 'Text dx '))
textdy = alt.param(value = 0, bind = alt.binding(input = 'range', min = -20, max = 20, step = 1, name = 'Text dy '))
textangle = alt.param(value = 0, bind = alt.binding(input = 'range', min = -180, max = 180, step = 1, name = 'Text angle'))
# title
title = alt.TitleParams('Total Number of Diamond Cut Variations',
dx = titledx,
font = titlefont,
fontSize = titlesize,
fontWeight = titleweight,
color = titlecolor)
# Bar Chart
bar = alt.Chart(df, width = 400, height = 160, title = title).mark_bar(size = size, color = color, stroke = strokecolor, opacity = opacity).encode(
x = alt.X('count()', axis = None),
y = alt.Y('cut', title = None, axis = alt.Axis(labelFont = titlefont,
labelFontWeight = titleweight,
ticks = False, labelPadding = 5,
labelFontSize = xsize,
labelColor = xcolor))
).add_params(titlefont,
titlesize,
titleweight,
titledx,
titlecolor,
xsize, xcolor,
size, color,
strokecolor, opacity)
# Text
text = bar.mark_text(align = textalign, dx = textdx, dy = textdy, angle = textangle, fontWeight = titleweight, font = titlefont).encode(
text = alt.Text('count()')
).add_params(textalign, textdx, textdy, textangle)
chart = bar + text
chart.configure_view(strokeWidth = 0)